Abstract

This post serves as an introduction into an exploration of the Diabetes Epidemic in North Carolina. Through a series of post the project will evovle to explore the data avialable and provide possible solutions to the problem. This idea is based off a report written as my Masters Capstone. This post will answer the following questions

  1. What is the overall trend of diabetes in North Carolina.

Enviroment

# Attach these packages so their functions don't need to be qualified: http://r-pkgs.had.co.nz/namespace.html#search-path
library(magrittr) # enables piping : %>%
library(dplyr)    # data wrangling
library(ggplot2)  # graphs
library(tidyr)    # data tidying
library(maps)
library(mapdata)
library(sf)
library(gganimate)
#set ggplot theme
ggplot2::theme_set(theme_bw())
#Put code in here.  It doesn't call a chunk in the codebehind file.

Data

The data for this exploration comes from several different sources:

  1. The Diabetes data was taken from the US Diabetes Surveillance System; www.cdc.gov/diabetes/data; Division of Diabetes Translation - Centers for Disease Control and Prevention. The data was downloaded by year, and compiled into one set.

  2. The list of rural counties is compiled from The US Census Bureau, it includes all North Carolina Counties that are at least 90% rural, more on the topic can be found here Rural America

# load the data, and have all column names in lowercase

nc_diabetes_data_raw <- readr::read_rds("./data-public/derived/nc-diabetes-data.rds") %>% 
  rename_all(tolower)

us_diabetes_data_raw <- readr::read_csv("data-public/raw/us_diabetes_totals.csv", 
                                        skip = 2)

rural_counties <- readr::read_csv("./data-public/metadata/rural-counties.csv")

county_centers_raw <- readxl::read_xlsx("./data-public/raw/nc_county_centers.xlsx", col_names = c("county", "lat","long"))

Data Tweaks

The Diabetes data comes quite tidy from the CDC, the script to combine each year can be found here The only tweaks done here are to combine the rural countines column, and the map data for creating maps.

county_centers <- county_centers_raw %>% 
  mutate_all(~stringr::str_replace_all(.,
                                       c("\\°"  = ""
                                         ,"\\+" = ""
                                         ,"\\–" = "-"
                                        )
                                      )
                                    ) %>% 
  mutate_at(c("lat","long"),as.numeric) %>% 
  mutate_at("county", tolower)


us_diabetes_data <- us_diabetes_data_raw %>% 
  filter(Year >= 2006) %>% 
  select( "Year","Total - Percentage") %>% 
  rename(year = Year , us_pct = `Total - Percentage`)

#join us totals

nc_diabetes_data <- nc_diabetes_data_raw %>% 
  mutate(
    rural = county %in% rural_counties$rural_counties
  ) %>% 
  mutate_at("county",tolower) %>% 
  left_join(us_diabetes_data)

Overall

When examining North Carolina as a whole we can see that NC has been trending much higher than the United States as a whole. We see that in 2016 there was a large spike in diagnosied cases, unfortunally this is the last year of data available to see if this upward trend contiunes.

nc_diabetes_data %>% 
  group_by(year) %>% 
  summarise(
    pct = mean(percentage)
    ,us_pct = mean(us_pct)
  ) %>% 
  pivot_longer(
    cols       = c("pct", "us_pct")
    ,names_to  = "metric"
    ,values_to = "values"
  ) %>% 
  mutate(
    metric = factor(metric
                    ,levels = c("pct","us_pct")
                    ,labels = c("NC", "National"))
  ) %>% 
  ggplot(aes(x = year, y = values, color = metric)) +
  geom_line() +
  geom_point(shape = 21, size = 3) +
  scale_y_continuous(labels = function(x) paste0(x, "%")) +
  scale_color_brewer(palette = "Dark2") +
  labs(
    x      = NULL
    ,y     = NULL
    ,color = NULL
    ,title = "Percent of Adults (20+) with Diagnosed Diabetes"
  )

When examing the further breakdown of Urban Vs Rural we see that while all of North Carolina has a higher percentage of diagnosied cases than the US average, Rural North Carolina has a higher percetage of cases vs Urban NC.

d <- nc_diabetes_data %>% 
  select(-us_pct) %>% 
  mutate(
    pct_rural  = if_else(rural == TRUE, percentage, NULL)
    ,pct_urban = if_else(rural == FALSE, percentage, NULL)
  ) %>% 
  select(-countyfips,-percentage) %>% 
  group_by(year) %>% 
  summarise(
    pct_rural = mean(pct_rural,na.rm = TRUE)
    ,pct_urban = mean(pct_urban,na.rm = TRUE)
  ) %>% left_join(us_diabetes_data) %>% 
  pivot_longer(
    cols       = c("us_pct", "pct_rural","pct_urban")
    ,names_to  = "metric"
    ,values_to = "value"
    ,values_drop_na = TRUE
  ) %>% 
  mutate(
    metric = factor(metric,
                    levels  = c("pct_rural","pct_urban","us_pct")
                    ,labels = c("Rural","Urban","US")
                    )
  )



d %>% ggplot(aes(x = year, y = value, color = metric)) +
  geom_line() +
  geom_point(shape = 21, size = 3) +
  # geom_smooth(method = "lm",se = FALSE) +
  scale_y_continuous(labels = function(x) paste0(x, "%")) +
  scale_color_brewer(palette = "Dark2") +
  labs(
    x      = NULL
    ,y     = NULL
    ,color = NULL
    ,title = "Percent of Adults (20+) with Diagnosed Diabetes \nDisplaying Rural vs Urban"
  ) 

  # ggpmisc::stat_poly_eq(formula = y ~ + x 
  #                     ,aes(label = paste(..eq.label.., ..rr.label.., sep = "~~~"))
  #                       ,parse = TRUE
  #                       )

By County

# 2006 Map Graph

counties <- st_as_sf(map("county",region = "north carolina", plot = FALSE,fill = TRUE)) %>% 
  mutate_at("ID", ~stringr::str_remove(.,"north carolina,")) %>% 
  left_join(nc_diabetes_data, by = c("ID" = "county")) 


county_centers <- st_as_sf(county_centers, coords = c("long","lat")
                           ,remove = FALSE, agr = "constant", crs = 4326)  


county_centers <- county_centers  %>% 
  left_join(nc_diabetes_data) %>% 
  mutate(
    rural = if_else(rural,"R","U")
  )

county_centers_2006 <- county_centers %>% filter(year == 2006) 
county_centers_2016 <- county_centers %>% filter(year == 2016)


counties %>% 
  filter(year == 2006) %>% 
  ggplot() +
  geom_sf(aes(fill = rural)) +
  geom_sf(data = county_centers_2006
          ,aes(size = percentage)
          ,shape = 21
          ,fill = "#0571b0"
          ,color = "black"
          ,alpha = 0.8) +
  scale_size(range = c(1,10)) +
  scale_fill_viridis_d(alpha = 0.5, direction = -1) +
  guides(
    fill = guide_legend(title = "Rural")
    ,size = guide_legend(title = "Percentage")
  ) +
  labs(
    title = "Diagnosied Diabetes by County 2006"
  )

counties %>% 
  filter(year == 2006) %>% 
  ggplot() +
  geom_sf(aes(fill = percentage)) +
  scale_fill_viridis_c(alpha = 0.6, direction = -1) +
  geom_sf_text(aes(label = rural), data = county_centers_2006, color = "#666666") +
  labs(
    title = "Diagnosied Diabetes by County 2006"
    ,x    = NULL
    ,y    = NULL
    ,fill = "Percentage"
  )

#2016 Map

counties %>% 
  filter(year == 2016) %>% 
  ggplot() +
  geom_sf(aes(fill = rural)) +
  geom_sf(data = county_centers_2016
          ,aes(size = percentage)
          ,shape = 21
          ,fill = "#0571b0"
          ,color = "black"
          ,alpha = 0.8) +
  scale_size(range = c(1,10)) +
  scale_fill_viridis_d(alpha = 0.5, direction = -1) +
  guides(
    fill = guide_legend(title = "Rural")
    ,size = guide_legend(title = "Percentage")
  ) +
  labs(
    title = "Diagnosied Diabetes by County 2016"
  )

counties %>% 
  filter(year == 2016) %>% 
  mutate(
   percentage = if_else(percentage <25,percentage, NULL)
  ) %>% 
  ggplot() +
  geom_sf(aes(fill = percentage)) +
  scale_fill_viridis_c(alpha = 0.6
                       ,direction = -1
  ) +
  geom_sf_text(aes(label = rural), data = county_centers_2016, color = "#666666") +
  labs(
    title = "Diagnosied Diabetes by County 2016"
    ,x    = NULL
    ,y    = NULL
    ,fill = "Percentage"
    ,caption = "Note : Jones County = 27.1%"
  )

g <- counties %>% 
  ggplot() +
  geom_sf(aes(fill = percentage)) +
  scale_fill_viridis_c(alpha = 0.6
                       ,direction = -1
  ) +
  transition_manual(year)

g <-  animate(g,end_pause = 10)

anim_save("./analysis/blogposts/basic-exploration/figure_rmd/animate_1.gif")

session information

For the sake of documentation and reproducibility, the current report was rendered in the following environment. Click the line below to expand.

Environment

- Session info -------------------------------------------------------------------------------------------------------
 setting  value                       
 version  R version 3.6.2 (2019-12-12)
 os       Windows 10 x64              
 system   x86_64, mingw32             
 ui       RTerm                       
 language (EN)                        
 collate  English_United States.1252  
 ctype    English_United States.1252  
 tz       America/New_York            
 date     2020-05-14                  

- Packages -----------------------------------------------------------------------------------------------------------
 package      * version date       lib source        
 assertthat     0.2.1   2019-03-21 [1] CRAN (R 3.6.1)
 backports      1.1.5   2019-10-02 [1] CRAN (R 3.6.1)
 callr          3.4.1   2020-01-24 [1] CRAN (R 3.6.2)
 cellranger     1.1.0   2016-07-27 [1] CRAN (R 3.6.1)
 class          7.3-15  2019-01-01 [2] CRAN (R 3.6.2)
 classInt       0.4-3   2020-04-07 [1] CRAN (R 3.6.3)
 cli            2.0.1   2020-01-08 [1] CRAN (R 3.6.2)
 colorspace     1.4-1   2019-03-18 [1] CRAN (R 3.6.1)
 crayon         1.3.4   2017-09-16 [1] CRAN (R 3.6.1)
 DBI            1.1.0   2019-12-15 [1] CRAN (R 3.6.2)
 desc           1.2.0   2018-05-01 [1] CRAN (R 3.6.2)
 devtools       2.2.1   2019-09-24 [1] CRAN (R 3.6.2)
 digest         0.6.21  2019-09-20 [1] CRAN (R 3.6.1)
 dplyr        * 0.8.3   2019-07-04 [1] CRAN (R 3.6.1)
 e1071          1.7-3   2019-11-26 [1] CRAN (R 3.6.3)
 ellipsis       0.3.0   2019-09-20 [1] CRAN (R 3.6.1)
 evaluate       0.14    2019-05-28 [1] CRAN (R 3.6.1)
 fansi          0.4.1   2020-01-08 [1] CRAN (R 3.6.2)
 farver         2.0.3   2020-01-16 [1] CRAN (R 3.6.2)
 fs             1.3.1   2019-05-06 [1] CRAN (R 3.6.1)
 gganimate    * 1.0.5   2020-02-09 [1] CRAN (R 3.6.2)
 ggplot2      * 3.3.0   2020-03-05 [1] CRAN (R 3.6.3)
 gifski         0.8.6   2018-09-28 [1] CRAN (R 3.6.2)
 glue           1.3.1   2019-03-12 [1] CRAN (R 3.6.1)
 gtable         0.3.0   2019-03-25 [1] CRAN (R 3.6.1)
 hms            0.5.3   2020-01-08 [1] CRAN (R 3.6.2)
 htmltools      0.4.0   2019-10-04 [1] CRAN (R 3.6.1)
 KernSmooth     2.23-16 2019-10-15 [2] CRAN (R 3.6.2)
 knitr        * 1.28    2020-02-06 [1] CRAN (R 3.6.2)
 labeling       0.3     2014-08-23 [1] CRAN (R 3.6.0)
 lifecycle      0.2.0   2020-03-06 [1] CRAN (R 3.6.3)
 magrittr     * 1.5     2014-11-22 [1] CRAN (R 3.6.1)
 mapdata      * 2.3.0   2018-03-30 [1] CRAN (R 3.6.2)
 maps         * 3.3.0   2018-04-03 [1] CRAN (R 3.6.2)
 memoise        1.1.0   2017-04-21 [1] CRAN (R 3.6.2)
 munsell        0.5.0   2018-06-12 [1] CRAN (R 3.6.1)
 pillar         1.4.3   2019-12-20 [1] CRAN (R 3.6.2)
 pkgbuild       1.0.6   2019-10-09 [1] CRAN (R 3.6.2)
 pkgconfig      2.0.3   2019-09-22 [1] CRAN (R 3.6.1)
 pkgload        1.0.2   2018-10-29 [1] CRAN (R 3.6.2)
 plyr           1.8.6   2020-03-03 [1] CRAN (R 3.6.3)
 prettyunits    1.1.1   2020-01-24 [1] CRAN (R 3.6.2)
 processx       3.4.1   2019-07-18 [1] CRAN (R 3.6.2)
 progress       1.2.2   2019-05-16 [1] CRAN (R 3.6.1)
 ps             1.3.0   2018-12-21 [1] CRAN (R 3.6.1)
 purrr          0.3.2   2019-03-15 [1] CRAN (R 3.6.1)
 R6             2.4.1   2019-11-12 [1] CRAN (R 3.6.2)
 RColorBrewer   1.1-2   2014-12-07 [1] CRAN (R 3.6.0)
 Rcpp           1.0.2   2019-07-25 [1] CRAN (R 3.6.1)
 readr          1.3.1   2018-12-21 [1] CRAN (R 3.6.1)
 readxl         1.3.1   2019-03-13 [1] CRAN (R 3.6.1)
 remotes        2.1.0   2019-06-24 [1] CRAN (R 3.6.2)
 rlang          0.4.6   2020-05-02 [1] CRAN (R 3.6.2)
 rmarkdown      2.1     2020-01-20 [1] CRAN (R 3.6.2)
 rprojroot      1.3-2   2018-01-03 [1] CRAN (R 3.6.2)
 scales         1.1.0   2019-11-18 [1] CRAN (R 3.6.2)
 sessioninfo    1.1.1   2018-11-05 [1] CRAN (R 3.6.2)
 sf           * 0.9-3   2020-05-04 [1] CRAN (R 3.6.3)
 stringi        1.4.4   2020-01-09 [1] CRAN (R 3.6.2)
 stringr        1.4.0   2019-02-10 [1] CRAN (R 3.6.1)
 testthat       2.3.1   2019-12-01 [1] CRAN (R 3.6.2)
 tibble         3.0.1   2020-04-20 [1] CRAN (R 3.6.3)
 tidyr        * 1.0.2   2020-01-24 [1] CRAN (R 3.6.2)
 tidyselect     0.2.5   2018-10-11 [1] CRAN (R 3.6.1)
 tweenr         1.0.1   2018-12-14 [1] CRAN (R 3.6.2)
 units          0.6-6   2020-03-16 [1] CRAN (R 3.6.3)
 usethis        1.5.1   2019-07-04 [1] CRAN (R 3.6.2)
 vctrs          0.2.4   2020-03-10 [1] CRAN (R 3.6.3)
 viridisLite    0.3.0   2018-02-01 [1] CRAN (R 3.6.1)
 withr          2.1.2   2018-03-15 [1] CRAN (R 3.6.1)
 xfun           0.12    2020-01-13 [1] CRAN (R 3.6.2)
 yaml           2.2.0   2018-07-25 [1] CRAN (R 3.6.0)

[1] C:/Users/belangew/Documents/R/win-library/3.6
[2] C:/Program Files/R/R-3.6.2/library